# Analysis Functions ------------------------------------------------------

#Sourcing Independence Function
source(file.path(iPath, "Independence_Function_1.R"))
source(file.path(iPath, "Severity.R"))

Analysis_Individual <- function(HUC8, Start, End, FLAG, BTOP, PTOB){

  HUC8 <- as.numeric(paste0(HUC8))
  Start <- as.character(Start)
  End <- as.character(End)
  
  #Organizing and Prepping the inputs to be compared to one another
  Site_MetaFile <- read.csv(file.path(iPath_Out_Meta, Meta_SiteHUC8), header = TRUE, sep = ",")
  Site_MetaFile <- Site_MetaFile[order(Site_MetaFile$HUC8),]
  
  Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==7] <- paste0("0", Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==7])
  Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==9] <- paste0("0", Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==9])
  
  Site_MetaFile$HUC8 <- as.numeric(paste0(Site_MetaFile$HUC8))
  Site_MetaFile$HUC8[is.na(Site_MetaFile$HUC8)] <- 0
  
  #Reading in the Threshold Values
  Return_Period <- read.csv(file.path(iPath_Out_Meta, Return_Period_HUC8), header = TRUE, sep = "," )

  Return_Period$Site_No[nchar(Return_Period$Site_No)==7] <- paste0("0", Return_Period$Site_No[nchar(Return_Period$Site_No)==7])
  Return_Period$Site_No[nchar(Return_Period$Site_No)==9] <- paste0("0", Return_Period$Site_No[nchar(Return_Period$Site_No)==9])
  
  #Creating a list of the Site Id for each site in the basin
    Site_List <- NULL
    for(i in 1:nrow(Site_MetaFile)){
      if (Site_MetaFile$HUC8[i] == HUC8){
        Daily <- file.path(iPath_Out_Daily, paste0(Site_MetaFile$Site_No[i], "_Daily_Discharge.csv"))
        if(file.exists(Daily)){
          Site_List <- rbind(Site_List, data.frame(Site_ID = Site_MetaFile$Site_No[i]))
        }      
      }
    }
    
    ##To subset... use the match function
    Site_List$Q2 <- Return_Period$Q2[match(Site_List$Site_ID, Return_Period$Site_No)]
    
    #Creating New Index to Merge with Data Files once subsetted
    Start <- as.Date(Start)
    End <- as.Date(End)
    Day_Indicies <- data.frame(Date = seq.Date(Start, End, by = "day"))
    Day_Indicies$Date_Index <- seq.int(nrow(Day_Indicies))
    
    #Creating the List of all Sites and the Temporal Window and begin Analysis
    Data_List <- list()
    Site_Events <- list()
    Temporal_Window <- NULL
    Site_Independence <- list()
    Basin_Events <- NULL
    for (i in 1:nrow(Site_List)){
      Data_List[[i]] <- read.csv(file.path(iPath_Out_Daily, paste0(Site_List$Site_ID[i], "_Daily_Discharge.csv")))
      Data_List[[i]] <- Data_List[[i]][,-1]
      Data_List[[i]]$Date <- as.Date(Data_List[[i]]$Date)
      
      #Subsetting data by the dates entered into the function
      Data_List[[i]] <- subset(Data_List[[i]], Date >= Start)
      Data_List[[i]] <- subset(Data_List[[i]], Date <= End)
      
      #Create Index from 1 -> Number of Days
      Data_List[[i]] <- merge(Day_Indicies, Data_List[[i]], by = "Date", all.x = TRUE)
      
      #For all Missing Data assign 0 (For the portions of missing years)
      Data_List[[i]]$Discharge_cms[is.na(Data_List[[i]]$Discharge_cms)] <- 0
      
      #Calculate Change in Discharge
      Data_List[[i]]$DeltaQ <- c(0,diff(Data_List[[i]]$Discharge_cms))
        
      #Assign the Score to Delta Q
      for (j in 1:nrow(Data_List[[i]])) {
        Data_List[[i]]$Score_Q[j][Data_List[[i]]$DeltaQ[j] > 0] <- 1
        Data_List[[i]]$Score_Q[j][Data_List[[i]]$DeltaQ[j] == 0] <- 0
        Data_List[[i]]$Score_Q[j][Data_List[[i]]$DeltaQ[j] < 0] <- -1
      }
      
      #Determine Peaks vs. Non-Peaks
      for (k in 1:(nrow(Data_List[[i]])-3)){
        if (!is.na(Data_List[[i]]$Score_Q[k])){
          if (((Data_List[[i]]$Score_Q[k] == 1) && (Data_List[[i]]$Score_Q[k+1] == -1))|
              ((Data_List[[i]]$Score_Q[k] == 1) && (Data_List[[i]]$Score_Q[k+1] == 0) && (Data_List[[i]]$Score_Q[k+2] == -1))|
              ((Data_List[[i]]$Score_Q[k] == 1) && (Data_List[[i]]$Score_Q[k+1] == 0) && (Data_List[[i]]$Score_Q[k+2] == 0) && (Data_List[[i]]$Score_Q[k+3] == -1))){
            Data_List[[i]]$Peak[k] <- TRUE
          }
          else{
            Data_List[[i]]$Peak[k] <- FALSE
          }
        }
      }
      
      #Determine POT based on Q2
      Flag <- FALSE
      Times <- data.frame()
      Peak_Discharge <-NULL
      for (l in 1:nrow(Data_List[[i]])){
        if (!is.na(Site_List$Q2[i])){
          if ((Flag == FALSE) && (Site_List$Q2[i] <= Data_List[[i]]$Discharge_cms[l])){
            Flag <- TRUE
            Base_Time <- (Data_List[[i]]$Date_Index[l]-1)
          }
          if (Data_List[[i]]$Peak[l] == TRUE && Data_List[[i]]$Discharge_cms[l] >= Site_List$Q2[i]){
            Peak_Discharge <- rbind(Peak_Discharge, c(Data_List[[i]]$Date_Index[l], Data_List[[i]]$Discharge_cms[l]))
          }
          if (Flag == TRUE && Site_List$Q2[i] > Data_List[[i]]$Discharge_cms[l]){
            Flag <- FALSE
            End_Time <- Data_List[[i]]$Date_Index[l]
            Peak_IDX <- which.max(Peak_Discharge[,2])
            if(length(Peak_IDX) == 0){
              Times <- Times
            }
            else{
              Times <- rbind(Times, data.frame(Site_ID = Site_List$Site_ID[i], Base_Time, Peak_Time = Peak_Discharge[Peak_IDX, 1], End_Time, Peak_Discharge = Peak_Discharge[Peak_IDX,2]))
              Peak_Discharge <- NULL
            }
          }
        }
        else{
          Times <- Times
        }
      }
      
      Site_Events[[i]] <- Times
      if(nrow(Site_Events[[i]]) > 0){
        Site_Events[[i]]$BtoP <- (Site_Events[[i]]$Peak_Time - Site_Events[[i]]$Base_Time)
        Site_Events[[i]]$PtoB <- (Site_Events[[i]]$End_Time - Site_Events[[i]]$Peak_Time)
      }
      else{
        Site_Events[[i]] <- Site_Events[[i]]
      }
      
      #If the Site Events list is empty, then we assign 0 if not, then we take the median of the Site Events BtoP and PtoB
      if (nrow(Site_Events[[i]]) > 0){
        if(FLAG == TRUE){
          Temporal_Window <- rbind(Temporal_Window, data.frame(BtoP = round(median(Site_Events[[i]]$BtoP)), 
                                                               PtoB = round(median(Site_Events[[i]]$PtoB))))
        }
        if(FLAG == FALSE){
          Temporal_Window <- rbind(Temporal_Window, data.frame(BtoP = BTOP, PtoB = PTOB))
        }
        
      }
      else{
          Temporal_Window <- rbind(Temporal_Window, data.frame(BtoP = 0, PtoB = 0))
      }
      
      #Recalculating the time of each individual peak based on the median of BtoP and PtoB
      for (m in 1:nrow(Site_Events[[i]])){
        #Site_Events[[i]]$Event_Base[m] <- Site_Events[[i]]$Peak_Time[m] - Temporal_Window$BtoP[i]
        Site_Events[[i]]$Event_Base[m] <- Site_Events[[i]]$Base_Time[m]
        Site_Events[[i]]$Event_Peak[m] <- Site_Events[[i]]$Peak_Time[m]
        #Site_Events[[i]]$Event_End[m] <- Site_Events[[i]]$Peak_Time[m] + Temporal_Window$PtoB[i]
        Site_Events[[i]]$Event_End[m] <- Site_Events[[i]]$End_Time[m]
      }
      
      #Creating Dataframes for Site Specific Independence
      #If the Site only has one event, dont run independence on that site
      
      if (nrow(Site_Events[[i]]) > 0){
        Site_Independence[[i]] <- Independence_1(Site_Events[[i]])
        Site_Independence[[i]]$Site_ID <- Site_List$Site_ID[i]
      }
      else{
        Site_Independence[[i]] <- data.frame(Event_Base = Site_Events[[i]]$Event_Base, Event_Peak = Site_Events[[i]]$Peak_Time, 
                                             Event_End = Site_Events[[i]]$Event_End, Peak_Discharge = Site_Events[[i]]$Peak_Discharge)
      }
      
      Site_Independence[[i]]$Peak_Discharge <- (Site_Independence[[i]]$Peak_Discharge/Site_List$Q2[i])
      
      #Data frame of all Site Specific Events
      if(nrow(Site_Events[[i]]) > 0){
        Basin_Events <- rbind(Basin_Events, Site_Independence[[i]])
        Basin_Events <- Basin_Events[order(Basin_Events$Event_Base, Basin_Events$Event_Peak, Basin_Events$Event_End),]
      }
      else{
        Basin_Events <- rbind(Basin_Events, data.frame())
      }
    }
    
    #Final Data Frame of Basin Specifc Events
    
    if (nrow(Basin_Events) > 1){
      Basin_Independence <- Independence_1(Basin_Events)
    }
    else{
      Basin_Independence <- Basin_Events[,-5]
    }
    
    for (n in 1:nrow(Basin_Independence)) {
      Basin_Independence$Event_Duration[n] <- (Basin_Independence$Event_End[n] - Basin_Independence$Event_Base[n])
    }
    
    Interevent <- NULL
    for(o in 1:nrow(Basin_Independence)){
      if(nrow(Basin_Independence) > 1){
        Interevent[o] <- Basin_Independence$Event_Base[o+1] - Basin_Independence$Event_End[o]
      }
      else{
        Interevent[o] <- 0 
      }
    }
    Mu <- mean(Interevent, na.rm = T)
    Med <- median(Interevent, na.rm = T)
    Max <- max(Interevent, na.rm = T)
    Min <- min(Interevent, na.rm = T)
    
    if(nrow(Basin_Events)>0){
      Basin_Severity <- Severity(Basin_Events, Site_List)
    }
    else{
      Basin_Severity <- data.frame(Effected = 0, Total_Discharge = 0, Severity = 0)
    }
    
    Basin_Specific_Freq <- sum(nrow(Basin_Independence))
    
    names(Data_List) <- Site_List$Site_ID
    names(Site_Events) <- Site_List$Site_ID
    names(Site_Independence) <- Site_List$Site_ID
    row.names(Temporal_Window) <- Site_List$Site_ID
    
    #List of Sites and their Temporal Windows
    #print(Site_List)
    #print(Temporal_Window)

    #Site Specific Events
    print(Site_Events)
    #print(Site_Independence)
    
    #Basin Specific Events
    #print(Basin_Events)
    print(Basin_Independence)
    #print(Basin_Specific_Freq)
    print(Basin_Severity)

  return(Data_List)
}
